<?php

require_once('./discuz_publish.php');

if($_GET['ii']) $ii = $_GET['ii'];
else $ii = 0;


$i=$ii;
{
	$base_url = "http://www.linuxde.net/category/technical_extension" .($i != 0 ? "/page/$i" : "");
	$content = file_get_contents($base_url);
	file_put_contents( dirname(__FILE__ ) . '/log', $title . '{br}'. $content . '{br}');

	preg_match_all('/<\/span> <a href="(.*?)" title="[^~]*?" rel="bookmark">(.*?)<\/a><\/h2>/i', $content, $matches);

	if(is_array($matches))
	{
		$hrefs = $matches[1];
		$titles = $matches[2];

		foreach($hrefs as $k=>$v)
		{
			//if($k < 5)  continue;
			$title = $titles[$k];
			$message = get_message($v);

			discuz_publish(($title), ($message), 40);
			//echo $title . "<br>";
		}
		
		//header("location: ./" . basename(__FILE__) . '?ii=' . ($i++) ); 
		
	}
}
$i++;
echo "<script type='text/javascript'>location.href='./linuxde_com_crawl.php?ii=" . ($i) . "'</script>";
exit(0);

function get_message($v)
{
	$data = file_get_contents($v);
	$data = str_replace('~', '', $data);

	preg_match_all('/<div class="single-entry-content">([^~]*?)<div class="clear">/', $data, $matches);

	$data = trim($matches[1][0]);
	
	$data = data_clean($data);
	//var_dump($data); exit(0);
	return $data;
}

function data_clean($data)
{
	$data = preg_replace('/<BR>/i', "\n", $data);
	$data = preg_replace('/<p>/i', "\n", $data);
	$data = preg_replace('/<[^i](.*?)>/i', '', $data);
	$data = str_replace('&nbsp;', ' ', $data);
	$data = str_replace('&ldquo;', '"', $data);
	$data = str_replace('&rdquo;', '"', $data);
	$data = str_replace('&ndash;', '"', $data);
	return $data;
}



?>